import torch
import torchvision
from torchvision import transforms
import random
import numpy as np
from scipy.ndimage import gaussian_filter1d
import matplotlib.pyplot as plt
from PIL import Image
import os
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'viridis'
Functions and useful variables¶
SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
def preprocess(img, size=224):
transform = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(mean=SQUEEZENET_MEAN.tolist(),
std=SQUEEZENET_STD.tolist()),
transforms.Lambda(lambda x: x[None]), # add one dimension
])
return transform(img)
def deprocess(img, should_rescale=True):
transform = transforms.Compose([
transforms.Lambda(lambda x: x[0]),
transforms.Normalize(mean=[0, 0, 0], std=(1.0 / SQUEEZENET_STD).tolist()),
transforms.Normalize(mean=(-SQUEEZENET_MEAN).tolist(), std=[1, 1, 1]),
transforms.Lambda(rescale) if should_rescale else transforms.Lambda(lambda x: x),
transforms.ToPILImage(),
])
return transform(img)
def rescale(x):
low, high = x.min(), x.max()
x_rescaled = (x - low) / (high - low)
return x_rescaled
def blur_image(X, sigma=1):
X_np = X.cpu().clone().detach().numpy()
X_np = gaussian_filter1d(X_np, sigma, axis=2)
X_np = gaussian_filter1d(X_np, sigma, axis=3)
X.copy_(torch.Tensor(X_np).type_as(X))
return X
def jitter(X, ox, oy):
"""
Helper function to randomly jitter an image.
Inputs
- X: PyTorch Tensor of shape (N, C, H, W)
- ox, oy: Integers giving number of pixels to jitter along W and H axes
Returns: A new PyTorch Tensor of shape (N, C, H, W)
"""
if ox != 0:
left = X[:, :, :, :-ox]
right = X[:, :, :, -ox:]
X = torch.cat([right, left], dim=3)
if oy != 0:
top = X[:, :, :-oy]
bottom = X[:, :, -oy:]
X = torch.cat([bottom, top], dim=2)
return X
Load the model¶
For this TME, we will use the Squeezenet model which is a light model pre-trained on ImageNet. This model will be frozen: the goal is not to modify or train the weights but to study them.
# Load the model
model = torchvision.models.squeezenet1_1(pretrained=True)
# Model in test mode
model.eval()
# Freeze the weights
for param in model.parameters():
param.requires_grad = False
/home/marilynch/mind/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/marilynch/mind/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=SqueezeNet1_1_Weights.IMAGENET1K_V1`. You can also use `weights=SqueezeNet1_1_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /home/marilynch/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth
100%|██████████████████████████████████████████████████████████████████████████████| 4.73M/4.73M [00:01<00:00, 2.69MB/s]
Load example images¶
This will fill the variables X, y, class_names with 25 examples from the validation set of ImageNet. X containes the images, y the class index of each image, and class_names a dictionary giving the class name from its index.
# Download data
#!wget https://github.com/cdancette/deep-learning-polytech-tp6-7/raw/master/tp9/imagenet_val_25.npz
!wget https://github.com/rdfia/rdfia.github.io/raw/master/data/3-b/imagenet_val_25.npz
--2026-01-12 23:41:46-- https://github.com/rdfia/rdfia.github.io/raw/master/data/3-b/imagenet_val_25.npz Resolving github.com (github.com)... 140.82.121.3 Connecting to github.com (github.com)|140.82.121.3|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://raw.githubusercontent.com/rdfia/rdfia.github.io/master/data/3-b/imagenet_val_25.npz [following] --2026-01-12 23:41:47-- https://raw.githubusercontent.com/rdfia/rdfia.github.io/master/data/3-b/imagenet_val_25.npz Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 3940548 (3.8M) [application/octet-stream] Saving to: ‘imagenet_val_25.npz’ imagenet_val_25.npz 100%[===================>] 3.76M 2.75MB/s in 1.4s 2026-01-12 23:41:49 (2.75 MB/s) - ‘imagenet_val_25.npz’ saved [3940548/3940548]
f = np.load("imagenet_val_25.npz", allow_pickle=True)
X, y, class_names = f["X"], f["y"], f["label_map"].item()
class_names_to_id = {name: id for id, name in class_names.items()}
plt.figure(figsize=(15, 7))
for i in range(24):
plt.subplot(4, 6, i + 1)
plt.imshow(X[i])
plt.title(class_names[y[i]])
plt.axis('off')
plt.gcf().tight_layout()
Saliency Maps¶
Calculate the saliency map for 5 examples out of the 25 loaded ones following the instructions of the TP guide.
Hint : To choose 1 particular value in each row of a matrix, you can do this:
x = torch.Tensor([[0.1, 0.0, 0.5, 0.1, 0.1],
[0.0, 0.1, 0.0, 0.6, 0.2],
[0.7, 0.1, 0.1, 0.3, 0.0]])
x[np.arange(3), [2, 3, 0]]
# 0.5000
# 0.6000
# 0.7000
#[torch.FloatTensor of size 3]
def compute_saliency_maps(X, y, model):
"""
Compute a class saliency map using the model for images X and labels y.
Input:
- X: Input images; Tensor of shape (N, 3, H, W)
- y: Labels for X; LongTensor of shape (N,)
- model: A pretrained CNN that will be used to compute the saliency map.
Returns:
- saliency: A Tensor of shape (N, H, W) giving the saliency maps for the input
images.
"""
# activate gradients on X
X.requires_grad = True
saliency = None
##############################################################################
# TODO: Implement this function. Perform a forward and backward pass through #
# the model to compute the gradient of the correct class score with respect #
# to each input image. #
# You first want to extract the logits for the correct scores (not the loss),#
# and then compute the gradients with a backward pass. #
##############################################################################
# forward pass
output = model(X)
# extract the score of the correct class for each image
score = output.gather(1, y.view(-1, 1)).sum()
# backward pass
model.zero_grad()
score.backward()
# absolute value of the gradient wi
saliency = X.grad.data.abs()
# for every pixel take the maximum value of the 3 RGB channels to produce a 2D matrix
saliency, _ = torch.max(saliency, dim=1)
##############################################################################
# END OF YOUR CODE #
##############################################################################
return saliency
Test your code with the following function:
def show_saliency_maps(X, y, model):
# Convert X and y from numpy arrays to Torch Tensors
X_tensor = torch.cat([preprocess(Image.fromarray(x)) for x in X], dim=0)
y_tensor = torch.LongTensor(y)
# Compute saliency maps for images in X
saliency = compute_saliency_maps(X_tensor, y_tensor, model)
# Convert the saliency map from Torch Tensor to numpy array and show images
# and saliency maps together.
saliency = saliency.numpy()
N = X.shape[0]
for i in range(N):
plt.subplot(2, N, i + 1)
plt.imshow(X[i])
plt.axis('off')
plt.title(class_names[y[i]])
plt.subplot(2, N, N + i + 1)
plt.imshow(saliency[i], cmap=plt.cm.hot)
plt.axis('off')
plt.gcf().set_size_inches(12, 5)
plt.show()
for i in range(1): # range(5) pour tester toutes les images
show_saliency_maps(X[5*i:5*i+5], y[5*i:5*i+5], model)
tensor(143.8850, grad_fn=<SumBackward0>)
Saliency Maps with VGG16¶
# testing with vgg16
import torchvision.models as models
from torchvision.models import VGG16_Weights
weights = VGG16_Weights.DEFAULT
vgg16 = torchvision.models.vgg16(weights=weights)
show_saliency_maps(X[5*i:5*i+5], y[5*i:5*i+5], vgg16)
tensor(130.3640, grad_fn=<SumBackward0>)
Adversarial Examples (Fooling Images)¶
Write the code to calculate an image such that it will be classified in a target_y different from the real class (by modifying the image and not the network parameters). See the TP guide for instructions.
The first two blocks will allow you to perform tests in an interactive way to write and test your code. Once your code seems to work, complete the function in the 3rd block and test on various images in the 4th block.
# Initialize tests
X_tensor = torch.Tensor(preprocess(Image.fromarray(X[0])))
target_y = class_names_to_id['stingray'] # Desired class
X_fooling = X_tensor.clone()
X_fooling.requires_grad = True
learning_rate = 1
# TODO write your code to test here
# Visualize the image X_folling and its modifications
plt.subplot(1, 2, 1)
plt.imshow(np.asarray(deprocess(X_fooling.clone())).astype(np.uint8))
plt.title("Image X_fooling")
plt.subplot(1, 2, 2)
plt.imshow(np.asarray(deprocess(10* (X_fooling - X_tensor), should_rescale=False)))
plt.title("Magnified difference with X_tensor (x10)")
plt.show()
def make_fooling_image(X, target_y, model):
"""
Generate a fooling image that is close to X, but that the model classifies
as target_y.
Inputs:
- X: Input image; Tensor of shape (1, 3, 224, 224)
- target_y: An integer in the range [0, 1000)
- model: A pretrained CNN
Returns:
- X_fooling: An image that is close to X, but that is classifed as target_y
by the model.
"""
# Initialize our fooling image to the input image, enable gradients.
X_fooling = X.clone()
X_fooling.requires_grad = True
learning_rate = 1
##############################################################################
# TODO: Generate a fooling image X_fooling that the model will classify as #
# the class target_y. You should perform gradient ascent on the score of the #
# target class, stopping when the model is fooled. #
# When computing an update step, first normalize the gradient: #
# dX = learning_rate * grad / ||grad||_2 #
# #
# You should write a training loop. #
# #
# HINT: For most examples, you should be able to generate a fooling image #
# in fewer than 100 iterations of gradient ascent. #
# You can print your progress over iterations to check your algorithm. #
# HINT: Remember to reset gradients at each step #
# HINT: update shouldn't be tracked by the autograd (see for example #
# https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_autograd.html#sphx-glr-beginner-examples-autograd-two-layer-net-autograd-py #
##############################################################################
model.eval()
iters = 100
for i in range(iters):
# forward pass
output = model(X_fooling)
y_pred = output.argmax(dim=1)
# checking if prediction is y_target
if y_pred.item() == target_y:
return X_fooling
# extract the score of the target class for each image
score = output[0, target_y]
print(score)
# computing gradient
model.zero_grad()
X_fooling.grad = None
score.backward()
grad = X_fooling.grad.data
grad_norm = torch.norm(grad)
dX = learning_rate * grad / grad_norm
#dX = learning_rate * (grad / (torch.abs(grad)**2))
# new image
with torch.no_grad():
X_fooling += dX
##############################################################################
# END OF YOUR CODE #
##############################################################################
return X_fooling
# Index of the image to modify and the target class
idx = 1
target_y = class_names_to_id['stingray']
# Preparation of tensor X and it's "fooling" version
X_tensor = torch.cat([preprocess(Image.fromarray(x)) for x in X], dim=0)
X_fooling = make_fooling_image(X_tensor[idx:idx+1], target_y, model)
# Check the predicted class
scores = model(X_fooling)
assert target_y == scores.data.max(1)[1][0], 'The model is not fooled!'
# Display
X_fooling_np = deprocess(X_fooling.clone())
X_fooling_np = np.asarray(X_fooling_np).astype(np.uint8)
plt.subplot(1, 4, 1)
plt.imshow(X[idx])
plt.title(class_names[y[idx]])
plt.axis('off')
plt.subplot(1, 4, 2)
plt.imshow(X_fooling_np)
plt.title(class_names[target_y])
plt.axis('off')
plt.subplot(1, 4, 3)
X_pre = preprocess(Image.fromarray(X[idx]))
diff = np.asarray(deprocess(X_fooling - X_pre, should_rescale=False))
plt.imshow(diff)
plt.title('Difference')
plt.axis('off')
plt.subplot(1, 4, 4)
diff = np.asarray(deprocess(10 * (X_fooling - X_pre), should_rescale=False))
plt.imshow(diff)
plt.title('Magnified difference (10x)')
plt.axis('off')
plt.gcf().set_size_inches(12, 5)
plt.show()
tensor(7.4139, grad_fn=<SelectBackward0>) tensor(11.3495, grad_fn=<SelectBackward0>) tensor(15.7549, grad_fn=<SelectBackward0>) tensor(20.1402, grad_fn=<SelectBackward0>) tensor(24.6926, grad_fn=<SelectBackward0>) tensor(29.0855, grad_fn=<SelectBackward0>) tensor(33.6584, grad_fn=<SelectBackward0>) tensor(38.5288, grad_fn=<SelectBackward0>) tensor(42.8886, grad_fn=<SelectBackward0>) tensor(46.5208, grad_fn=<SelectBackward0>) tensor(49.6670, grad_fn=<SelectBackward0>)
Bonus : test with different input images and different target classes.¶
class_names
# Index of the image to modify and the target class
idx = 4
target_y = class_names_to_id['hen']
# Preparation of tensor X and it's "fooling" version
X_tensor = torch.cat([preprocess(Image.fromarray(x)) for x in X], dim=0)
X_fooling = make_fooling_image(X_tensor[idx:idx+1], target_y, model)
# Check the predicted class
scores = model(X_fooling)
assert target_y == scores.data.max(1)[1][0], 'The model is not fooled!'
# Display
X_fooling_np = deprocess(X_fooling.clone())
X_fooling_np = np.asarray(X_fooling_np).astype(np.uint8)
plt.subplot(1, 4, 1)
plt.imshow(X[idx])
plt.title(class_names[y[idx]])
plt.axis('off')
plt.subplot(1, 4, 2)
plt.imshow(X_fooling_np)
plt.title(class_names[target_y])
plt.axis('off')
plt.subplot(1, 4, 3)
X_pre = preprocess(Image.fromarray(X[idx]))
diff = np.asarray(deprocess(X_fooling - X_pre, should_rescale=False))
plt.imshow(diff)
plt.title('Difference')
plt.axis('off')
plt.subplot(1, 4, 4)
diff = np.asarray(deprocess(10 * (X_fooling - X_pre), should_rescale=False))
plt.imshow(diff)
plt.title('Magnified difference (10x)')
plt.axis('off')
plt.gcf().set_size_inches(12, 5)
plt.show()
tensor(8.8568, grad_fn=<SelectBackward0>) tensor(13.9808, grad_fn=<SelectBackward0>) tensor(20.4337, grad_fn=<SelectBackward0>) tensor(26.9275, grad_fn=<SelectBackward0>)
# Index of the image to modify and the target class
idx = 2
target_y = class_names_to_id['bulbul']
# Preparation of tensor X and it's "fooling" version
X_tensor = torch.cat([preprocess(Image.fromarray(x)) for x in X], dim=0)
X_fooling = make_fooling_image(X_tensor[idx:idx+1], target_y, model)
# Check the predicted class
scores = model(X_fooling)
assert target_y == scores.data.max(1)[1][0], 'The model is not fooled!'
# Display
X_fooling_np = deprocess(X_fooling.clone())
X_fooling_np = np.asarray(X_fooling_np).astype(np.uint8)
plt.subplot(1, 4, 1)
plt.imshow(X[idx])
plt.title(class_names[y[idx]])
plt.axis('off')
plt.subplot(1, 4, 2)
plt.imshow(X_fooling_np)
plt.title(class_names[target_y])
plt.axis('off')
plt.subplot(1, 4, 3)
X_pre = preprocess(Image.fromarray(X[idx]))
diff = np.asarray(deprocess(X_fooling - X_pre, should_rescale=False))
plt.imshow(diff)
plt.title('Difference')
plt.axis('off')
plt.subplot(1, 4, 4)
diff = np.asarray(deprocess(10 * (X_fooling - X_pre), should_rescale=False))
plt.imshow(diff)
plt.title('Magnified difference (10x)')
plt.axis('off')
plt.gcf().set_size_inches(12, 5)
plt.show()
tensor(4.6396, grad_fn=<SelectBackward0>) tensor(9.4648, grad_fn=<SelectBackward0>) tensor(16.5036, grad_fn=<SelectBackward0>) tensor(23.3633, grad_fn=<SelectBackward0>) tensor(29.6256, grad_fn=<SelectBackward0>) tensor(35.8212, grad_fn=<SelectBackward0>) tensor(39.9245, grad_fn=<SelectBackward0>)
Class visualization¶
Write the code which generates an image maximizing the score of a class, subject to a certain number of regularizations. See the TP guide for details.
def create_class_visualization(target_y, model, dtype, init_img=None, l2_reg=1e-3, learning_rate=5,
num_iterations=200, blur_every=10, max_jitter=16, show_every=25):
"""
Generate an image to maximize the score of target_y under a pretrained model.
Inputs:
- target_y: Integer in the range [0, 1000) giving the index of the class
- model: A pretrained CNN that will be used to generate the image
- dtype: Torch datatype to use for computations
Keyword arguments:
- init_img: Initial image to use (if None, will be random)
- l2_reg: Strength of L2 regularization on the image
- learning_rate: How big of a step to take
- num_iterations: How many iterations to use
- blur_every: How often to blur the image as an implicit regularizer
- max_jitter: How much to gjitter the image as an implicit regularizer
- show_every: How often to show the intermediate result
"""
model.type(dtype)
# Randomly initialize the image as a PyTorch Tensor
if init_img is None:
img = torch.randn(1, 3, 224, 224).mul_(1.0).type(dtype).detach()
else:
img = init_img.clone().mul_(1.0).type(dtype).detach()
img.requires_grad = True
for t in range(num_iterations):
# Randomly jitter the image a bit; this gives slightly nicer results
ox, oy = random.randint(0, max_jitter), random.randint(0, max_jitter)
img = (jitter(img, ox, oy)).clone().detach()
img.requires_grad = True
########################################################################
# - TODO: Use the model to compute the gradient of the score for the #
# class target_y with respect to the pixels of the image, and make a #
# gradient step on the image using the learning rate. Don't forget the #
# L2 regularization term! #
# - Be very careful about the signs of elements in your code. #
# - Advice: compute backward on the raw logits (not the loss), it #
# works better #
########################################################################
# forward pass
output = model(img)
class_score = output[0, target_y]
# loss
l2_norm = torch.norm(img)
loss = class_score - l2_reg * l2_norm
# backward pass
model.zero_grad()
if img.grad is not None:
img.grad.zero_()
loss.backward()
# gradient ascent update
with torch.no_grad():
img += learning_rate * img.grad
########################################################################
# END OF YOUR CODE #
########################################################################
# Undo the random jitter
img.data.copy_(jitter(img, -ox, -oy))
img = img.clone()
# As regularizer, clamp and periodically blur the image
for c in range(3):
lo = float(-SQUEEZENET_MEAN[c] / SQUEEZENET_STD[c])
hi = float((1.0 - SQUEEZENET_MEAN[c]) / SQUEEZENET_STD[c])
img[:, c].clamp_(min=lo, max=hi)
if t % blur_every == 0:
blur_image(img, sigma=0.5)
# Periodically show the image
if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
plt.imshow(deprocess(img.clone().cpu()))
class_name = class_names[target_y]
plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations))
plt.gcf().set_size_inches(4, 4)
plt.axis('off')
plt.show()
return deprocess(img.cpu())
Test with various classes and starting from random noise:
dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to use GPU
model.type(dtype)
# target_y = 76 # Tarantula
# target_y = 78 # Tick
# target_y = 187 # Yorkshire Terrier
# target_y = 683 # Oboe
# target_y = 366 # Gorilla
# target_y = 604 # Hourglass
target_y = 113 # Snail
# target_y = np.random.randint(1000) # Classe aléatoire
out = create_class_visualization(target_y, model, dtype, show_every=25, num_iterations=200)
Varying Learning rate, iterations, and L2 regularization term¶
learning_rates = [1, 3, 5, 7, 9]
for lr in learning_rates:
print(f"Learning rate = {lr}, iterations = 200, l2_reg = 1e-3")
out = create_class_visualization(target_y, model, dtype, learning_rate = lr, show_every=200, num_iterations=200)
Learning rate = 1, iterations = 200, l2_reg = 1e-3
Learning rate = 3, iterations = 200, l2_reg = 1e-3
Learning rate = 5, iterations = 200, l2_reg = 1e-3
Learning rate = 7, iterations = 200, l2_reg = 1e-3
Learning rate = 9, iterations = 200, l2_reg = 1e-3
iterations = [100, 200, 300, 400, 500]
for itera in iterations:
print(f"Learning rate = 5, iterations = {itera}, l2_reg = 1e-3")
out = create_class_visualization(target_y, model, dtype, show_every=itera, num_iterations=itera)
Learning rate = 5, iterations = 100, l2_reg = 1e-3
Learning rate = 5, iterations = 200, l2_reg = 1e-3
Learning rate = 5, iterations = 300, l2_reg = 1e-3
Learning rate = 5, iterations = 400, l2_reg = 1e-3
Learning rate = 5, iterations = 500, l2_reg = 1e-3
l2_regs = [0.0001, 0.001, 0.01, 0.1, 1]
for l2 in l2_regs:
print(f"Learning rate = 5, iterations = 200, l2_reg = {l2}")
out = create_class_visualization(target_y, model, dtype, l2_reg = l2, show_every=200, num_iterations=200)
Learning rate = 5, iterations = 200, l2_reg = 0.0001
Learning rate = 5, iterations = 200, l2_reg = 0.001
Learning rate = 5, iterations = 200, l2_reg = 0.01
Learning rate = 5, iterations = 200, l2_reg = 0.1
Learning rate = 5, iterations = 200, l2_reg = 1
Testing with an image¶
Test by starting from an image from ImageNet:
# Initialize test
img_ind = 0
target_y = 113 # snail
X_tensor = torch.Tensor(preprocess(Image.fromarray(X[img_ind])))
out = create_class_visualization(target_y, model, dtype, init_img=X_tensor, show_every=25, num_iterations=200)
Class visualization with VGG16¶
dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to use GPU
model.type(dtype)
# target_y = 76 # Tarantula
# target_y = 78 # Tick
# target_y = 187 # Yorkshire Terrier
# target_y = 683 # Oboe
# target_y = 366 # Gorilla
# target_y = 604 # Hourglass
target_y = 113 # Snail
# target_y = np.random.randint(1000) # Classe aléatoire
out = create_class_visualization(target_y, vgg16, dtype, show_every=25, num_iterations=200)
# Initialize test
img_ind = 0
target_y = 113 # snail
X_tensor = torch.Tensor(preprocess(Image.fromarray(X[img_ind])))
out = create_class_visualization(target_y, vgg16, dtype, init_img=X_tensor, show_every=25, num_iterations=200)